%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% This file is part of the book
%%
%% Probability and Its Applications
%% http://code.google.com/p/probability-book/
%%
%% Copyright (C) 2010 Minh Van Nguyen <nguyenminh2@gmail.com>
%%
%% See the file COPYING for copying conditions.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\chapter{Moments and Deviations}
\index{moment}
\index{standard deviation}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\section{Markov's inequality}
\index{Markov's inequality}

\begin{theorem}
\label{thm:moments:Markov_inequality}
\textbf{Markov's inequality.}\index{Markov's inequality}
For a random variable $X$ that takes on only nonnegative values and
for all $a > 0$, we have
\[
\Pr(X \geq a)
\leq
\frac{\E[X]}{a}.
\]
\end{theorem}

\begin{proof}
\index{Markov's inequality}
Let $X$ be a random variable such that $X \geq 0$ and let
$a > 0$. Define a random variable $I$ by
\[
I
=
\begin{cases}
1, & \text{if $X \geq a$,} \\
0, & \text{otherwise}
\end{cases}
\]
where $\E[I] = \Pr(I = 1) = \Pr(X \geq a)$ and
%%
\begin{equation}
\label{eq:moments:binomial_random_variable_I}
I
\leq
\frac{X}{a}.
\end{equation}
%%
The expectations of both sides
of~\eqref{eq:moments:binomial_random_variable_I} are given by the
inequality
\[
\E[I]
=
\Pr(X \geq a)
\leq
\E\left[\frac{X}{a}\right]
=
\frac{\E[X]}{a}
\]
where we used
Lemma~\ref{lem:discrete:linearity_constants}\index{linearity!constants}.
\end{proof}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\section{Variance and moments of random variables}

The $k$-th \emph{moment}\index{moment} of a random variable $X$ is
$\E[X^k]$. The \emph{variance}\index{variance} of $X$ is
%%
\begin{align*}
\Var[X]
&=
\E\big[(X - \E[X])^2\big] \\
&=
\E\big[X^2 - 2X \cdot \E[X] + (\E[X])^2\big] \\
&=
\E[X^2] - 2 \cdot \E[X] \cdot \E[X] + (\E[X])^2 \\
&=
\E[X^2] - (\E[X])^2
\end{align*}
%%
and the \emph{standard deviation}\index{standard deviation} of $X$ is
defined as
\[
\sigma(X)
=
\sqrt{\Var[X]}.
\]
If $X$ and $Y$ are two random variables, their
\emph{covariance}\index{covariance} is
%%
\begin{align*}
\Cov(X, Y)
&=
\E\big[(X - \E[X]) (Y - \E[Y])\big] \\
&=
\E\big[(Y - \E[Y]) (X - \E[X])\big] \\
&=
\Cov(Y, X).
\end{align*}

\begin{theorem}
\label{thm:moments:variance_of_sum_of_random_variables}
For any two random variables $X$ and $Y$, we have
\[
\Var[X + Y]
=
\Var[X] + \Var[Y] + 2 \cdot \Cov(X, Y).
\]
\end{theorem}

\begin{proof}
Use the linearity\index{linearity!expecation} of expectations, and the
definitions of variance and covariance\index{covariance}, to obtain
%%
\begin{align*}
\Var[X + Y]
&=
\E\big[(X + Y - \E[X + Y])^2\big] \\
&=
\E\big[(X + Y - \E[X] - \E[Y])^2\big] \\
&=
\E\big[(X - \E[X])^2 + (Y - \E[Y])^2 + 2(X - \E[X]) (Y - \E[Y])\big] \\
&=
\E\big[(X - \E[X])^2\big] + \E\big[(Y  - \E[Y])^2\big]
+ 2 \cdot \E\big[(X - \E[X]) (Y - \E[Y])\big] \\
&=
\Var[X] + \Var[Y] + 2 \cdot \Cov(X, Y)
\end{align*}
%%
as required.
\end{proof}

Theorem~\ref{thm:moments:variance_of_sum_of_random_variables} can be
extended to a sum of any finite number of random variables. For a
collection $X_1, \dots, X_n$ of random variables, it can be shown that
\[
\Var\left[\sum_i X_i\right]
=
\sum_i \Var[X_i] + 2 \cdot \sum_i \sum_{j > i} \Cov(X_i, X_j).
\]

\begin{theorem}
\label{thm:moments:expectation_is_multiplicative}
\index{expectation!multiplicative}
For any two independent random variables $X$ and $Y$, we have
\[
\E[X \cdot Y]
=
\E[X] \cdot \E[Y].
\]
\end{theorem}

\begin{proof}
Let the indices $i$ and $j$ assume all values in the ranges of $X$ and
$Y$, respectively. As $X$ and $Y$ are independent random variables,
then
%%
\begin{align*}
\E[X \cdot Y]
&=
\sum_i \sum_j ij \cdot \Pr\big((X = i) \cap (Y = j)\big) \\
&=
\sum_i \sum_j ij \cdot \Pr(X = i) \cdot \Pr(Y = j) \\
&=
\left[\sum_i i \cdot \Pr(X = i)\right]
\left[\sum_j j \cdot \Pr(Y = j)\right] \\
&=
\E[X] \cdot \E[Y]
\end{align*}
%%
as required.
\end{proof}

\begin{corollary}
For any independent random variables $X$ and $Y$, we have
\[
\Cov(X, Y)
=
0
\]
and
\[
\Var[X + Y]
=
\Var[X] + \Var[Y].
\]
\end{corollary}

\begin{proof}
As $X$ and $Y$ are independent, then so are $X - \E[X]$ and
$Y - \E[Y]$. For any random variable $Z$, we have
\[
\E\big[Z - \E[Z]\big]
=
\E[Z] - \E\big[\E[Z]\big]
=
0.
\]
Using
Theorem~\ref{thm:moments:expectation_is_multiplicative}\index{expectation!multiplicative},
the covariance of $X$ and $Y$ is
%%
\begin{align*}
\Cov(X, Y)
&=
\E\big[(X - \E[X]) (Y - \E[Y])\big] \\
&=
\E\big[(X - \E[X])\big] \cdot \E\big[(Y - \E[Y])\big] \\
&=
0.
\end{align*}
%%
Conclude via the latter equation and
Theorem~\ref{thm:moments:variance_of_sum_of_random_variables} that
%%
\begin{align*}
\Var[X + Y]
&=
\Var[X] + \Var[Y] + 2 \cdot \Cov(X, Y) \\
&=
\Var[X] + \Var[Y]
\end{align*}
%%
as required.
\end{proof}

For a collection $X_1, \dots, X_n$ of mutually independent random
variables, it can be shown by induction that
\[
\Var\left[\sum_i X_i\right]
=
\sum_i \Var[X_i].
\]

{\color{red}
Compute the variance and standard deviation of the following random
variables:
\begin{itemize}
\item A binomial random variable.

\item A geometric random variable.
\end{itemize}
}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\section{Chebyshev's inequality}
\index{Chebyshev's inequality}

\begin{theorem}
\label{thm:moments:Chebyshev_inequality}
\index{Chebyshev's inequality}
\textbf{Chebyshev's inequality.}
For any $a > 0$ and a random variable $X$, we have
\[
\Pr\big(|X - \E[X]| \geq a\big)
\leq
\frac{\Var[X]}{a^2}.
\]
\end{theorem}

\begin{proof}
Note that
\[
\Pr\big(|X - \E[X]| \geq a\big)
=
\Pr\big((X - \E[X])^2 \geq a^2\big)
\]
and the random variable $(X - \E[X])^2 > 0$. Use
Markov's\index{Markov's inequality} inequality and the definition of
variance to obtain
\[
\Pr\big((X - \E[X])^2 \geq a^2\big)
\leq
\frac{\E\big[(X - \E[X])^2\big]}{a^2}
=
\frac{\Var[X]}{a^2}
\]
as required.
\end{proof}

\begin{corollary}
For any $t > 1$ and a random variable $X$, we have
%%
\begin{align*}
\Pr\big(|X - \E[X]| \geq t \cdot \sigma(X)\big)
&\leq
\frac{1}{t^2} \\
\Pr\big(|X - \E[X]| \geq t \cdot \E[X]\big)
&\leq
\frac{\Var[X]}{t^2 (\E[X])^2}.
\end{align*}
\end{corollary}

\begin{proof}
The results follow from the definitions of variance\index{variance}
and standard\index{standard deviation} deviation, and
Chebyshev's\index{Chebyshev's inequality} inequality.
\end{proof}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\section{Problems}

\begin{problem}
\item For any $c \in \R$ and a random variable $X$, show that
  $\Var[cX] = c^2 \Var[X]$.

\item For any two independent random variables $X$ and $Y$, show that
  $\Var[X - Y] = \Var[X] + \Var[Y]$.

\item Let $X_1, \dots, X_n$ be a collection of
  Bernoulli\index{random variable!Bernoulli} random variables and
  write $X = \sum_i X_i$. Show that
  \[
  \E[X^2]
  =
  \sum_i \Pr(X_i = 1) \cdot \E[X \mid X_i = 1].
  \]
  \emph{Hint:} First show that
  \[
  \E[X^2]
  =
  \sum_i \E[X_i X]
  \]
  and then apply conditional\index{expectation!conditional}
  expectations.

\item Consider a collection $X_1, \dots, X_n$ of random variables and
  define a random variable $X$ as $X = \sum_i X_i$. If
  $\E[X_i X_j] = \E[X_i] \cdot \E[X_j]$ for every distinct pair of $i$
  and $j$, show that $\Var[X] = \sum_i \Var[X_i]$.

\item Let $t > 0$ and let $X$ be a random variable with standard
  deviation $\sigma(X)$. Show that
  %%
  \begin{align*}
  \Pr\big(X - \E[X] \geq t \cdot \sigma(X)\big)
  &\leq
  \frac{1}{1 + t^2} \\
  \Pr\big(|X - \E[X]| \geq t \cdot \sigma(X)\big)
  &\leq
  \frac{2}{1 + t^2}.
  \end{align*}
\end{problem}
